This is a sample notebook ,as modified from https://github.com/dswh/voila-covid-19-dashboard/blob/master/notebooks/covid_19_dashboard.ipynb
the demo is for a dashboard view of viewing COVID 19 cases worldwide
The datasets orginated from John Hopekins and can be downloaded here https://github.com/CSSEGISandData/COVID-19
Case_country.csv is converted from the summary .csv which came from this location: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports/csse_covid_19_daily_reports/
# importing libraries
from __future__ import print_function
from IPython.core.display import display, HTML
from ipywidgets import interact, interactive, fixed, interact_manual
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
#import folium
import plotly.graph_objects as go
import seaborn as sns
import ipywidgets as widgets
confirmed_df = pd.read_csv ( "./time_series_covid19_confirmed_global.csv")
deaths_df = pd.read_csv ( "./time_series_covid19_deaths_global.csv")
recovered_df = pd.read_csv ( "./time_series_covid19_recovered_global.csv")
country_df = pd.read_csv('./cases_country.csv')
confirmed_df.head()
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 12/28/20 | 12/29/20 | 12/30/20 | 12/31/20 | 1/1/21 | 1/2/21 | 1/3/21 | 1/4/21 | 1/5/21 | 1/6/21 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 51280 | 51350 | 51405 | 51526 | 51526 | 51526 | 51526 | 53011 | 53105 | 53105 |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 56572 | 57146 | 57727 | 58316 | 58316 | 58991 | 59438 | 59623 | 60283 | 61008 |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 98631 | 98988 | 99311 | 99610 | 99897 | 100159 | 100408 | 100645 | 100873 | 101120 |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 7875 | 7919 | 7983 | 8049 | 8117 | 8166 | 8192 | 8249 | 8308 | 8348 |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 17296 | 17371 | 17433 | 17553 | 17568 | 17608 | 17642 | 17684 | 17756 | 17864 |
5 rows × 355 columns
confirmed_df.shape
(272, 355)
deaths_df.head()
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 12/28/20 | 12/29/20 | 12/30/20 | 12/31/20 | 1/1/21 | 1/2/21 | 1/3/21 | 1/4/21 | 1/5/21 | 1/6/21 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2174 | 2179 | 2181 | 2191 | 2191 | 2191 | 2191 | 2237 | 2244 | 2244 |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1164 | 1170 | 1174 | 1181 | 1181 | 1190 | 1193 | 1199 | 1210 | 1217 |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 2737 | 2745 | 2751 | 2756 | 2762 | 2769 | 2772 | 2777 | 2782 | 2786 |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 83 | 84 | 84 | 84 | 84 | 84 | 84 | 84 | 84 | 84 |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 403 | 403 | 405 | 405 | 405 | 407 | 408 | 408 | 410 | 413 |
5 rows × 355 columns
deaths_df.head()
deaths_df.shape
(272, 355)
recovered_df.head()
recovered_df.shape
(257, 355)
##CLeaning the data
confirmed_df.columns = map (str.lower, confirmed_df.columns)
deaths_df.columns = map (str.lower, deaths_df.columns)
recovered_df.columns = map (str.lower, recovered_df.columns)
country_df.columns = map (str.lower, country_df.columns)
confirmed_df.columns
Index(['province/state', 'country/region', 'lat', 'long', '1/22/20', '1/23/20',
'1/24/20', '1/25/20', '1/26/20', '1/27/20',
...
'12/28/20', '12/29/20', '12/30/20', '12/31/20', '1/1/21', '1/2/21',
'1/3/21', '1/4/21', '1/5/21', '1/6/21'],
dtype='object', length=355)
confirmed_df = confirmed_df.rename (columns = {"province/state":"state", "country/region":"country"} )
deaths_df = deaths_df.rename (columns = {"province/State":"state", "country/region":"country"} )
recovered_df = recovered_df.rename (columns = {"province/state":"state", "country/region":"country"} )
country_df = country_df.rename(columns={'country_region': 'country'})
confirmed_df.columns
Index(['state', 'country', 'lat', 'long', '1/22/20', '1/23/20', '1/24/20',
'1/25/20', '1/26/20', '1/27/20',
...
'12/28/20', '12/29/20', '12/30/20', '12/31/20', '1/1/21', '1/2/21',
'1/3/21', '1/4/21', '1/5/21', '1/6/21'],
dtype='object', length=355)
confirmed_total = int(country_df['confirmed'].sum())
deaths_total = int(country_df['deaths'].sum())
recovered_total = int(country_df['recovered'].sum())
active_total = int(country_df['active'].sum())
print ( confirmed_total )
print ( deaths_total )
print ( recovered_total )
print ( active_total )
87186540 1883761 48777336 36522525
sorted_country_df = country_df.sort_values('confirmed', ascending= False)
# sorting the values by confirmed descednding order
# country_df.sort_values('confirmed', ascending= False).head(10).style.background_gradient(cmap='copper')
fig = go.FigureWidget( layout=go.Layout() )
def highlight_col(x):
r = 'background-color: red'
y = 'background-color: blue'
g = 'background-color: grey'
df1 = pd.DataFrame('', index=x.index, columns=x.columns)
df1.iloc[:, 4] = y
df1.iloc[:, 5] = r
df1.iloc[:, 6] = g
return df1
def show_latest_cases(n):
n = int(n)
return country_df.sort_values('confirmed', ascending= False).head(n).style.apply(highlight_col, axis=None)
interact(show_latest_cases, n='10')
ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)
import plotly.graph_objects as go
def plot_cases_for_country(country):
labels = ['confirmed', 'deaths']
colors = ['blue', 'red']
mode_size = [6,8]
line_size = [4,5]
df_list = [ confirmed_df, deaths_df ]
fig = go.Figure()
for i,df in enumerate ( df_list ):
if country =="World" or country =="world":
x_data = np.array(list(df.iloc[:, 5:].columns)) #getting all the dates
y_data = np.sum(np.asarray(df.iloc[:,5:]), axis =0 )
else:
x_data = np.array(list(df.iloc[:, 5:].columns))
y_data = np.sum(np.asarray(df[df["country"]==country].iloc[:,5:]), axis =0 )
fig.add_trace(go.Scatter(x=x_data, y=y_data, mode="lines+markers",
name=labels[i],
line=dict(color=colors[i],width=line_size[i]),
connectgaps=True,
text = "Total "+ str(labels[i]) +":" + str(y_data[-1])
));
fig.update_layout(
title="COVID 19 cases of " + country,
xaxis_title='Date',
yaxis_title='No. of Confirmed Cases',
margin=dict(l=20, r=20, t=40, b=20),
paper_bgcolor="lightgrey",
width = 800,
);
fig.update_yaxes(type="linear")
fig.show()
plot_cases_for_country('US')
plot_cases_for_country('China')
interact(plot_cases_for_country, country='World')
ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)
columns_drop = ['fips', 'admin2', 'province_state' ]
sorted_country_df02 = sorted_country_df.drop(columns=columns_drop)
overall_sorted_country = sorted_country_df02
overall_sorted_country
overall_sorted_country.head(10)
| country | last_update | lat | long_ | confirmed | deaths | recovered | active | combined_key | incident_rate | case_fatality_ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 215 | France | 2021-01-07 05:22:03 | 46.227600 | 2.213700 | 2701215 | 66197 | 174751 | 2460267.0 | France | 4138.301919 | 2.450638 |
| 3958 | United Kingdom | 2021-01-07 05:22:03 | 52.355500 | -1.174300 | 2450983 | 67510 | 0 | 2383473.0 | England, United Kingdom | 4378.538048 | 2.754405 |
| 649 | Turkey | 2021-01-07 05:22:03 | 38.963700 | 35.243300 | 2283931 | 22070 | 2164040 | 97821.0 | Turkey | 2708.034463 | 0.966316 |
| 268 | India | 2021-01-07 05:22:03 | 19.449759 | 76.108221 | 1954553 | 49825 | 1852759 | 51969.0 | Maharashtra, India | 1587.206409 | 2.549176 |
| 6 | Argentina | 2021-01-07 05:22:03 | -38.416100 | -63.616700 | 1676171 | 43976 | 1474048 | 158147.0 | Argentina | 3708.689420 | 2.623599 |
| 65 | Brazil | 2021-01-07 05:22:03 | -23.550500 | -46.633300 | 1501085 | 47511 | 1324431 | 129143.0 | Sao Paulo, Brazil | 3268.981028 | 3.165111 |
| 489 | Poland | 2021-01-07 05:22:03 | 51.919400 | 19.145100 | 1344763 | 30055 | 1087744 | 226964.0 | Poland | 3553.193212 | 2.234966 |
| 286 | Iran | 2021-01-07 05:22:03 | 32.427908 | 53.688046 | 1261903 | 55830 | 1040521 | 165552.0 | Iran | 1502.391516 | 4.424270 |
| 593 | South Africa | 2021-01-07 05:22:03 | -30.559500 | 22.937500 | 1149591 | 31368 | 929239 | 188984.0 | South Africa | 1938.317977 | 2.728623 |
| 263 | India | 2021-01-07 05:22:03 | 14.705180 | 76.166436 | 924137 | 12124 | 902817 | 9196.0 | Karnataka, India | 1367.821581 | 1.311927 |
px.bar(
overall_sorted_country.head(10),
x = "country",
y = "confirmed",
title= "Top 10 worst affected countries", # the axis names
color_discrete_sequence=["pink"],
height=500,
width=800
)
#20200108 - notice that there is 2 indias in the dataset - one came from Maharashtra and another Karnataka
top10countries_death = overall_sorted_country.sort_values('deaths', ascending= False)
px.bar(
top10countries_death.head(10),
x = "country",
y = "deaths",
title= "Top 10 worst affected countries", # the axis names
color_discrete_sequence=["darkturquoise"],
height=500,
width=800
)
#notice there are 2 regions for brazil as well
top10countries_recovered = overall_sorted_country.sort_values('recovered', ascending= False)
px.bar(
top10countries_recovered.head(10),
x = "country",
y = "recovered",
title= "Top 10 recovered cases ", # the axis names
color_discrete_sequence=["green"],
height=500,
width=800
)
#notice there are 2 regions for india as well